package com.changpei.core.utils;

import lombok.extern.slf4j.Slf4j;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;

/**
 * @author weibiao
 */
@Slf4j
public class JsoupUtil {

    /**
     * 获取文档对象
     *
     * @param pageUrl:网页地址
     * @return {@link Document}
     */
    public static Document getDocument(String pageUrl) {
        // 获取文档对象
        Document doc = null;
        try {
            Connection con = Jsoup.connect(pageUrl)
                    .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
                    .header("Accept-Encoding", "gzip, deflate, sdch")
                    .header("Accept-Language", "zh-CN,zh;q=0.8")
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36")
                    .timeout(50000);

            Connection.Response response = con.execute();
            int success = 200;
            if (response.statusCode() == success) {
                doc = con.get();
            } else {
                log.info(String.valueOf(response.statusCode()));
                return null;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return doc;
    }
}
